/*******************************************************************************
Name: dist_gradient
*******************************************************************************/


clear

capture log close

log using  "${logdir}5-dist_gradient.log", replace


use "${sensdir}movers_working.dta", clear

*limit to useful variables
keep UNIT_NBR UNIT_NM addr year TOTAL* BLKT* HDQ_NBR P_NAME msafips msa_name ///
 metarea city state centralcity SIC3 SIC2 firm_size
 
*merge in unit ids for address and distance data
merge UNIT_NBR using "${sensdir}id_map.dta", sort uniqusing
drop _merge

*merge in address and distance data
merge unit_id year using "${sensdir}addresses_merge.dta", ///
	sort uniqusing keep(lat lon cbd_lat cbd_lon distv disth match_type)

*drop  plants with no employees
drop if TOTAL10 == 0

/*------------------------------------------------------------*/
*GENERATE ADDITIONAL VARIABLES
/*------------------------------------------------------------*/

*create one-digit sic code
gen SIC1 = .

replace SIC1 = 7 if SIC2 <= 9
replace SIC1 = 10 if SIC2 >= 10 & SIC2 <= 14
replace SIC1 = 15 if SIC2 >= 15 & SIC2 <= 19
replace SIC1 = 20 if SIC2 >= 20 & SIC2 <= 39
replace SIC1 = 40 if SIC2 >= 40 & SIC2 <= 49
replace SIC1 = 50 if SIC2 >= 50 & SIC2 <= 51
replace SIC1 = 52 if SIC2 >= 52 & SIC2 <= 59
replace SIC1 = 60 if SIC2 >= 60 & SIC2 <= 69
replace SIC1 = 70 if SIC2 >= 70 & SIC2 <= 89

*created scaled black percent of workforce
gen pblack = 100*(BLKT10/TOTAL10)


*calculate log plant size
gen ln_size = ln(TOTAL10)


*calculate metro area black share
egen btotal_msa = sum(BLKT10), by(metarea year)
egen total_msa = sum(TOTAL10), by(metarea year)

gen pblack_msa = 100*(btotal_msa/total_msa)

drop btotal_msa total_msa

*calculate normalized black share for establishment
gen pblack_n = pblack/pblack_msa


*create firm id
egen firm_id = group(HDQ_NBR)


egen msaXindXyear = group(metarea SIC1 year)
egen msaXfirmXyear = group(metarea firm_id year)
egen indXyear = group(SIC1 year)


bysort unit_id: gen first = _n == 1



/*------------------------------------------------------------*/
/*------------------------------------------------------------*/
*APPENDIX TABLE A3: DESCRIPTIVE STATISTICS
/*------------------------------------------------------------*/
/*------------------------------------------------------------*/

tab first
sum TOTAL10
tab SIC1
sum pblack

tab first if _merge == 3
sum TOTAL10 if _merge == 3
tab SIC1 if _merge == 3
sum pblack if _merge == 3
sum disth if _merge == 3

tab first if _merge == 3 & match_type == 1
sum TOTAL10 if _merge == 3 & match_type == 1
tab SIC1 if _merge == 3 & match_type == 1
sum pblack if _merge == 3 & match_type == 1
sum disth if _merge == 3 & match_type == 1

*central city establishments
sum disth if _merge == 3 & centralcity == 1
sum disth if _merge == 3 & match_type == 1 & centralcity == 1

*suburban establishments
sum disth if _merge == 3 & centralcity == 0
sum disth if _merge == 3 & match_type == 1 & centralcity == 0


tab _merge
keep if _merge == 3


*restrict to sample of MSAs and years

gen sample = disth != . & disth <= 60
egen sample_msa = max(sample), by(metarea)
keep if sample_msa == 1 & sample == 1

drop if year > 2000


compress

*LABEL TIME PERIODS
gen era = .
replace era = 1 if year >= 1971 & year <= 1980
replace era = 2 if year >= 1981 & year <= 1990
replace era = 3 if year >= 1990 & year <= 2000

gen dist = disth


/*------------------------------------------------------------*/
/*------------------------------------------------------------*/
*TABLE 1: Distance from CBD and Black Share of Employees
/*------------------------------------------------------------*/
/*------------------------------------------------------------*/

areg pblack_n dist ln_size [aw = TOTAL10], absorb(msaXindXyear) cluster(unit_id)
areg pblack_n dist ln_size [aw = TOTAL10], absorb(msaXfirmXyear) cluster(unit_id)

areg pblack_n dist ln_size [aw = TOTAL10] if era == 1, absorb(msaXindXyear) cluster(unit_id)
areg pblack_n dist ln_size [aw = TOTAL10] if era == 2, absorb(msaXindXyear) cluster(unit_id)
areg pblack_n dist ln_size [aw = TOTAL10] if era == 3, absorb(msaXindXyear) cluster(unit_id)


/*------------------------------------------------------------*/
/*------------------------------------------------------------*/
* FIGURE 1(A): DISTANCE SLOPE FOR EARLY AND LATE PERIODS
/*------------------------------------------------------------*/
/*------------------------------------------------------------*/

capture rm ${datadir}est70.dta
capture rm ${datadir}est00.dta

preserve


*non-parametric plot

reg pblack_n dist [aw = TOTAL10] if year >= 1971 & year <= 1975 & dist < 40, cluster(unit_id)
local slope70 "71-75 Slope = `:di %6.3f _b[dist]'"
local slope70_se "(`: di %4.3f _se[dist]')"

*smooth over firms with dist of zero

gen dist2 = dist
replace dist2 = 0.1*runiform() if dist == 0

binsreg pblack_n dist2 [aw = TOTAL10] if year >= 1971 & year <= 1975 & dist < 40, ///
	line(1 1) cb(1 1) nbins(50) vce(cluster unit_id) savedata(${datadir}est70.dta)



reg pblack_n dist [aw = TOTAL10] if year >= 1996 & year <= 2000 & dist < 40, cluster(unit_id)
local slope00 "96-00 Slope = `:di %6.3f _b[dist]'"
local slope00_se "(`: di %4.3f _se[dist]')"

binsreg pblack_n dist [aw = TOTAL10] if year >= 1996 & year <= 2000 & dist < 40, ///
	line(1 1) cb(1 1) nbins(50) vce(cluster unit_id) savedata(${datadir}est00.dta)


use ${datadir}est70.dta, clear
gen year = 1970

append using ${datadir}est00.dta
replace year = 2000 if year != 1970

keep line_x line_fit CB_l CB_r year

twoway (lpoly CB_r line_x if year == 1970, deg(1) kernel(gaussian) lcolor(gs4) lpattern(dot)) ///
	(lpoly CB_l line_x if year == 1970, deg(1) kernel(gaussian) lcolor(gs4) lpattern(dot)) ///
	(lpoly line_fit line_x if year == 1970, deg(1) kernel(gaussian) lcolor(gs4) lpattern(solid)) ///
	(lpoly CB_r line_x if year == 2000, deg(1) kernel(gaussian) lcolor(gs10) lpattern(dot)) ///
	(lpoly CB_l line_x if year == 2000, deg(1) kernel(gaussian) lcolor(gs10) lpattern(dot)) ///
	(lpoly line_fit line_x if year == 2000, deg(1) kernel(gaussian) lcolor(gs10) lpattern(dash)), ///
	legend(order(3 "1971-1975" 6 "1996-2000")) xtitle("Distance from CBD (Miles)") ///
	ytitle("Normalized Black Share of Employees") ///
	plotregion(fcolor(white)) graphregion(fcolor(white)) ///
	text(1.15 25 "`slope70'", place(e)  size(medium)) ///
	text(1.07 34 "`slope70_se'", place(e)  size(medium)) ///
	text(0.95 25 "`slope00'", place(e)  size(medium)) ///
	text(0.87 34 "`slope00_se'", place(e)  size(medium))
	
graph save "${outdir}distslope.gph", replace
graph export "${outdir}distslope.pdf", replace
	
rm ${datadir}est70.dta
rm ${datadir}est00.dta

restore


/*------------------------------------------------------------*/
* CALCULATE FRACTION OF EMPLOYMENT WITHIN 5 MILES OF CBD
/*------------------------------------------------------------*/

preserve

keep if (year >= 1971 & year <= 1975) | (year >= 1996 & year <= 2000)

gen cc = centralcity

gen year2 = 1970 if year <= 1975
replace year2 = 2000 if year >= 1996

collapse (sum) emp = TOTAL10 [aw = TOTAL10], by(metarea year2 cc)

rename year2 year



reshape wide emp, i(metarea year) j(cc)

rename emp1 eeoemp_cc
rename emp0 eeoemp_sub

gen eeoemp_tot = eeoemp_cc + eeoemp_sub

gen eeofcc = eeoemp_cc/eeoemp_tot

gen ln_eeofcc = ln(eeofcc)


merge metarea year using "${datadir}pop_working.dta", sort uniqusing

keep if _merge == 3

gen fcc = emp_tot_cc/emp_tot
gen ln_fcc = ln(fcc)

gen order = 0
replace order = 1 if year == 2000

tsset metarea order

gen ln_fcc_ld = ln_fcc - L1.ln_fcc
gen ln_eeofcc_ld = ln_eeofcc - L1.ln_eeofcc

reg ln_eeofcc_ld ln_fcc_ld

restore

/*------------------------------------------------------------*/
*CALCULATE MSA-SPECIFIC SLOPES IN EARLY AND LATE PERIODS
/*------------------------------------------------------------*/



preserve

*EARLY PERIOD

xi i.metarea|dist, noomit
drop _Imetarea_*

areg pblack_n _I* [aw = TOTAL10] if year >= 1971 & year <= 1975, absorb(msaXindXyear)

gen temp = dist
replace dist = 1
xi i.metarea|dist, noomit
predict slope_early, xb
replace dist = 0
xi i.metarea|dist, noomit
predict constant, xb
replace slope_early = slope_early - constant

drop constant

replace dist = temp

*LATE PERIOD

xi i.metarea|dist, noomit
drop _Imetarea_*

areg pblack_n _I* [aw = TOTAL10] if year >= 1996 & year <= 2000, absorb(msaXindXyear)

replace dist = 1
xi i.metarea|dist, noomit
predict slope_late, xb
replace dist = 0
xi i.metarea|dist, noomit
predict constant, xb
replace slope_late = slope_late - constant

drop constant

replace dist = temp

collapse (mean) slope_early slope_late, by(metarea)

*save msa-specific slopes
saveold "${datadir}pblackpd_slope_msa.dta", replace

restore



/*------------------------------------------------------------*/
*SAVE PREDICTED BLACK SHARE FOR EACH ESTABLISHMENT
/*------------------------------------------------------------*/

preserve

areg pblack_n dist ln_size, absorb(msaXindXyear) cluster(unit_id)

xi i.metarea|dist, noomit
drop _Imetarea_*
areg pblack_n _I* ln_size, absorb(msaXindXyear)


gen temp = ln_size
replace ln_size = 0
predict pblackpd, xb
*replace pblackpd = pblackpd*pblack_msa

replace ln_size = temp
drop temp


keep pblackpd dist UNIT_NBR pblack_n pblack_msa year

save "${datadir}pblackpd_all.dta", replace

restore




/*------------------------------------------------------------*/
*WITHIN OCCUPATION ANALYSIS
/*------------------------------------------------------------*/


forvalues i = 1(1)9 {
gen pblack_`i' = 100*min(BLKT`i'/TOTAL`i', 1)
replace pblack_`i' = . if TOTAL`i' == 0
}

drop pblack_n

keep pblack_* TOTAL* unit_id year ln_size metarea dist pblack_msa SIC1 era firm_id

gen size = TOTAL10

drop TOTAL10 TOTAL11 TOTAL12 TOTAL13

gen hs_total = TOTAL1 + TOTAL2 + TOTAL3
gen ms_total = TOTAL4 + TOTAL5 + TOTAL6 + TOTAL7
gen ls_total = TOTAL8 + TOTAL9


reshape long pblack_ TOTAL, i(unit_id year) j(occ)

egen job_id = group(unit_id occ)

drop if occ > 9

gen jobtot = TOTAL
gen ln_jobtot = ln(jobtot)

gen job_weight = jobtot/size

gen hs_weight = jobtot/hs_total
gen ms_weight = jobtot/ms_total
gen ls_weight = jobtot/ls_total

gen hs = (occ == 1 | occ == 2 | occ == 3)
gen ms = (occ == 4 | occ == 5 | occ == 6 | occ == 7)
gen ls = (occ == 8 | occ == 9)


egen msaXjobXyear = group(metarea occ SIC1 year)
egen msaXfirmjobXyear = group(metarea occ firm_id year)

gen pblack_n = pblack_/pblack_msa

/*------------------------------------------------------------*/
/*------------------------------------------------------------*/
*TABLE 1 (continued): Distance from CBD and Black Share of Employees
/*------------------------------------------------------------*/
/*------------------------------------------------------------*/

areg pblack_n dist ln_size [aw = jobtot], absorb(msaXjobXyear) cluster(unit_id)
areg pblack_n dist ln_size [aw = jobtot], absorb(msaXfirmjobXyear) cluster(unit_id)

areg pblack_n dist ln_size if era == 1 [aw = jobtot], absorb(msaXjobXyear) cluster(unit_id)
areg pblack_n dist ln_size if era == 2 [aw = jobtot], absorb(msaXjobXyear) cluster(unit_id)
areg pblack_n dist ln_size if era == 3 [aw = jobtot], absorb(msaXjobXyear) cluster(unit_id)

*split by occupation skill level
areg pblack_ dist ln_size [aw = ls_weight] if ls, absorb(msaXjobXyear)
areg pblack_ dist ln_size [aw = ms_weight] if ms, absorb(msaXjobXyear)
areg pblack_ dist ln_size [aw = hs_weight] if hs, absorb(msaXjobXyear)

  
log close
